In [1]:
import akshare as ak
import pandas as pd
import plotly
from plotly.offline import iplot, init_notebook_mode
import plotly.express as px
from datetime import datetime

init_notebook_mode()
In [2]:
# from akshare to get data
df_all_history = ak.epidemic_history()
df_all_history
Out[2]:
date country countryCode province provinceCode city cityCode confirmed suspected cured dead
0 2019-12-01 中国 CN 1 0 0 0
1 2019-12-01 中国 CN 湖北省 420000 1 0 0 0
2 2019-12-01 中国 CN 湖北省 420000 武汉市 420100 1 0 0 0
3 2019-12-02 中国 CN 1 0 0 0
4 2019-12-02 中国 CN 湖北省 420000 1 0 0 0
... ... ... ... ... ... ... ... ... ... ... ...
24148 2020-03-09 乌克兰 UA 1 0 0 0
24149 2020-03-09 美国 US 572 0 10 22
24150 2020-03-09 梵蒂冈 VA 1 0 0 0
24151 2020-03-09 越南 VN 30 0 16 0
24152 2020-03-09 南非 ZA 3 0 0 0

24153 rows × 11 columns

In [3]:
df_all = df_all_history
df_all['dates'] = df_all_history['date']

df_all['date']=pd.to_datetime(df_all['date'])
In [4]:
# oversea data
df_oversea = df_all.query("country!='中国'")
df_oversea.fillna(value="",inplace=True)
df_oversea
Out[4]:
date country countryCode province provinceCode city cityCode confirmed suspected cured dead dates
141 2020-01-16 日本 JP 1 0 0 0 2020-01-16
142 2020-01-16 泰国 TH 1 0 0 0 2020-01-16
171 2020-01-20 日本 JP 1 0 0 0 2020-01-20
172 2020-01-20 韩国 KR 1 0 0 0 2020-01-20
173 2020-01-20 泰国 TH 2 0 0 0 2020-01-20
... ... ... ... ... ... ... ... ... ... ... ... ...
24148 2020-03-09 乌克兰 UA 1 0 0 0 2020-03-09
24149 2020-03-09 美国 US 572 0 10 22 2020-03-09
24150 2020-03-09 梵蒂冈 VA 1 0 0 0 2020-03-09
24151 2020-03-09 越南 VN 30 0 16 0 2020-03-09
24152 2020-03-09 南非 ZA 3 0 0 0 2020-03-09

1614 rows × 12 columns

In [5]:
fig_oversea = px.line(df_oversea,x='dates',y='confirmed',line_group='country',color='country',\
                      color_discrete_sequence=px.colors.qualitative.D3,hover_name='country')
fig_oversea.show()
In [6]:
# from Feb 10, 2020
df_oversea_recent = df_oversea.set_index('date')
df_oversea_recent = df_oversea_recent['2020-02-10':]
df_oversea_recent
Out[6]:
country countryCode province provinceCode city cityCode confirmed suspected cured dead dates
date
2020-02-10 阿联酋 AE 7 0 0 0 2020-02-10
2020-02-10 澳大利亚 AU 15 0 3 0 2020-02-10
2020-02-10 比利时 BE 1 0 0 0 2020-02-10
2020-02-10 加拿大 CA 7 0 1 0 2020-02-10
2020-02-10 德国 DE 14 0 0 0 2020-02-10
... ... ... ... ... ... ... ... ... ... ... ...
2020-03-09 乌克兰 UA 1 0 0 0 2020-03-09
2020-03-09 美国 US 572 0 10 22 2020-03-09
2020-03-09 梵蒂冈 VA 1 0 0 0 2020-03-09
2020-03-09 越南 VN 30 0 16 0 2020-03-09
2020-03-09 南非 ZA 3 0 0 0 2020-03-09

1348 rows × 11 columns

In [9]:
df_oversea_recent.to_excel('epidemic_oversea_recent_filled.xlsx')
In [11]:
df_oversea_supp = pd.read_excel('epidemic_supp.xlsx')
df_oversea_supp['dates'] = df_oversea_supp['date'].apply(lambda x:x.strftime('%Y-%m-%d'))
df_oversea_supp.set_index('date', inplace = True)
df_oversea_supp.fillna(value = "", inplace = True)
print(df_oversea_supp.info())
df_oversea_supp
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 10 entries, 2020-02-10 to 2020-02-19
Data columns (total 11 columns):
country         10 non-null object
countryCode     10 non-null object
province        10 non-null object
provinceCode    10 non-null object
city            10 non-null object
cityCode        10 non-null object
confirmed       10 non-null int64
suspected       10 non-null int64
cured           10 non-null int64
dead            10 non-null int64
dates           10 non-null object
dtypes: int64(4), object(7)
memory usage: 960.0+ bytes
None
Out[11]:
country countryCode province provinceCode city cityCode confirmed suspected cured dead dates
date
2020-02-10 伊朗 IR 0 0 0 0 2020-02-10
2020-02-11 伊朗 IR 0 0 0 0 2020-02-11
2020-02-12 伊朗 IR 0 0 0 0 2020-02-12
2020-02-13 伊朗 IR 0 0 0 0 2020-02-13
2020-02-14 伊朗 IR 0 0 0 0 2020-02-14
2020-02-15 伊朗 IR 0 0 0 0 2020-02-15
2020-02-16 伊朗 IR 0 0 0 0 2020-02-16
2020-02-17 伊朗 IR 0 0 0 0 2020-02-17
2020-02-18 伊朗 IR 0 0 0 0 2020-02-18
2020-02-19 伊朗 IR 0 0 0 0 2020-02-19
In [12]:
# combine two dataFrames
df_oversea_recent_new = df_oversea_recent.append(df_oversea_supp)

df_oversea_recent_new.sort_index(inplace = True)

df_oversea_recent_new
Out[12]:
country countryCode province provinceCode city cityCode confirmed suspected cured dead dates
date
2020-02-10 阿联酋 AE 7 0 0 0 2020-02-10
2020-02-10 伊朗 IR 0 0 0 0 2020-02-10
2020-02-10 越南 VN 14 0 3 0 2020-02-10
2020-02-10 美国 US 12 0 3 0 2020-02-10
2020-02-10 泰国 TH 32 0 8 0 2020-02-10
... ... ... ... ... ... ... ... ... ... ... ...
2020-03-09 埃及 EG 55 0 12 1 2020-03-09
2020-03-09 爱沙尼亚 EE 10 0 0 0 2020-03-09
2020-03-09 厄瓜多尔 EC 14 0 0 0 2020-03-09
2020-03-09 克罗地亚 HR 12 0 0 0 2020-03-09
2020-03-09 法罗群岛 FO 1 0 0 0 2020-03-09

1358 rows × 11 columns

In [13]:
fig_oversea_recent = px.scatter(df_oversea_recent_new,x='dead',y='confirmed',size='confirmed',\
                               text='country',color='country',color_discrete_sequence=px.colors.qualitative.Light24,\
                               animation_frame='dates',animation_group='country',hover_name='country',\
                               range_x=[-10,260],range_y=[0,8000],size_max=50,template='plotly_white')
fig_oversea_recent.show()
In [ ]: